#### Produce the GDD-EDD dose-response curves

### Prepare environment

setwd("~/research/coffee-dataverse/src")

do.origspec <- T

source("load.R", encoding="iso-8859-1")

library(ggplot2)
library(lfe)
library(splines)
source("felm-tools.R")
source("conley.R")

### Prepare the data

df$ns.year1 <- ns(df$year, df=3)[, 1]
df$ns.year2 <- ns(df$year, df=3)[, 2]
df$ns.year3 <- ns(df$year, df=3)[, 3]

## Expand state:ns entries
for (state in unique(df$state)) {
    df[, paste0(state, 'xns.year1')] <- (df$state == state) * df$ns.year1
    df[, paste0(state, 'xns.year2')] <- (df$state == state) * df$ns.year2
    df[, paste0(state, 'xns.year3')] <- (df$state == state) * df$ns.year3
}

paste(paste(paste0(unique(df$state), 'xns.year1'), collapse=' + '),
      paste(paste0(unique(df$state), 'xns.year2'), collapse=' + '),
      paste(paste0(unique(df$state), 'xns.year3'), collapse=' + '), sep=' + ')

if (do.origspec) {
    summary(felm(logyield ~ tmin + gdd1000 + edd1000 + prcp + prcp2 + ACxns.year1 + AMxns.year1 + PAxns.year1 + APxns.year1 + TOxns.year1 + MAxns.year1 + PIxns.year1 + CExns.year1 + PBxns.year1 + PExns.year1 + ALxns.year1 + SExns.year1 + BAxns.year1 + MGxns.year1 + ESxns.year1 + RJxns.year1 + SPxns.year1 + PRxns.year1 + SCxns.year1 + MSxns.year1 + MTxns.year1 + GOxns.year1 + DFxns.year1 + ROxns.year1 + RNxns.year1 + ACxns.year2 + AMxns.year2 + PAxns.year2 + APxns.year2 + TOxns.year2 + MAxns.year2 + PIxns.year2 + CExns.year2 + PBxns.year2 + PExns.year2 + ALxns.year2 + SExns.year2 + BAxns.year2 + MGxns.year2 + ESxns.year2 + RJxns.year2 + SPxns.year2 + PRxns.year2 + SCxns.year2 + MSxns.year2 + MTxns.year2 + GOxns.year2 + DFxns.year2 + ROxns.year2 + RNxns.year2 + ACxns.year3 + AMxns.year3 + PAxns.year3 + APxns.year3 + TOxns.year3 + MAxns.year3 + PIxns.year3 + CExns.year3 + PBxns.year3 + PExns.year3 + ALxns.year3 + SExns.year3 + BAxns.year3 + MGxns.year3 + ESxns.year3 + RJxns.year3 + SPxns.year3 + PRxns.year3 + SCxns.year3 + MSxns.year3 + MTxns.year3 + GOxns.year3 + DFxns.year3 + ROxns.year3 + RNxns.year3 | Munic�pio + year | 0 | Munic�pio + year, data=df))

    mod.hi <- felm(logyield ~ tmin + gdd1000 + edd1000 + prcp + prcp2 + ACxns.year1 + AMxns.year1 + PAxns.year1 + APxns.year1 + TOxns.year1 + MAxns.year1 + PIxns.year1 + CExns.year1 + PBxns.year1 + PExns.year1 + ALxns.year1 + BAxns.year1 + MGxns.year1 + ESxns.year1 + RJxns.year1 + SPxns.year1 + PRxns.year1 + SCxns.year1 + MSxns.year1 + MTxns.year1 + GOxns.year1 + DFxns.year1 + ROxns.year1 + ACxns.year2 + AMxns.year2 + PAxns.year2 + TOxns.year2 + MAxns.year2 + PIxns.year2 + CExns.year2 + PBxns.year2 + PExns.year2 + ALxns.year2 + BAxns.year2 + MGxns.year2 + ESxns.year2 + RJxns.year2 + SPxns.year2 + PRxns.year2 + SCxns.year2 + MSxns.year2 + MTxns.year2 + GOxns.year2 + DFxns.year2 + ROxns.year2 + RNxns.year2 + ACxns.year3 + AMxns.year3 + PAxns.year3 + TOxns.year3 + PIxns.year3 + CExns.year3 + PBxns.year3 + PExns.year3 + ALxns.year3 + BAxns.year3 + MGxns.year3 + ESxns.year3 + RJxns.year3 + SPxns.year3 + PRxns.year3 + SCxns.year3 + MSxns.year3 + MTxns.year3 + GOxns.year3 + DFxns.year3 + ROxns.year3 | Munic�pio + year | 0 | Munic�pio + year, data=df, keepCX=T)
    mod.con <- ConleySEs(mod.hi, df[-mod.hi$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
} else {
    summary(felm(logyield ~ tmin + tmax + gdd1000 + edd1000 + prcp + prcp2 + ACxns.year1 + AMxns.year1 + PAxns.year1 + APxns.year1 + TOxns.year1 + MAxns.year1 + PIxns.year1 + CExns.year1 + PBxns.year1 + PExns.year1 + ALxns.year1 + SExns.year1 + BAxns.year1 + MGxns.year1 + ESxns.year1 + RJxns.year1 + SPxns.year1 + PRxns.year1 + SCxns.year1 + MSxns.year1 + MTxns.year1 + GOxns.year1 + DFxns.year1 + ROxns.year1 + RNxns.year1 + ACxns.year2 + AMxns.year2 + PAxns.year2 + APxns.year2 + TOxns.year2 + MAxns.year2 + PIxns.year2 + CExns.year2 + PBxns.year2 + PExns.year2 + ALxns.year2 + SExns.year2 + BAxns.year2 + MGxns.year2 + ESxns.year2 + RJxns.year2 + SPxns.year2 + PRxns.year2 + SCxns.year2 + MSxns.year2 + MTxns.year2 + GOxns.year2 + DFxns.year2 + ROxns.year2 + RNxns.year2 + ACxns.year3 + AMxns.year3 + PAxns.year3 + APxns.year3 + TOxns.year3 + MAxns.year3 + PIxns.year3 + CExns.year3 + PBxns.year3 + PExns.year3 + ALxns.year3 + SExns.year3 + BAxns.year3 + MGxns.year3 + ESxns.year3 + RJxns.year3 + SPxns.year3 + PRxns.year3 + SCxns.year3 + MSxns.year3 + MTxns.year3 + GOxns.year3 + DFxns.year3 + ROxns.year3 + RNxns.year3 | Munic�pio + year | 0 | Munic�pio + year, data=df))

    mod.hi <- felm(logyield ~ tmin + tmax + gdd1000 + edd1000 + prcp + prcp2 + ACxns.year1 + AMxns.year1 + PAxns.year1 + APxns.year1 + TOxns.year1 + MAxns.year1 + PIxns.year1 + CExns.year1 + PBxns.year1 + PExns.year1 + ALxns.year1 + BAxns.year1 + MGxns.year1 + ESxns.year1 + RJxns.year1 + SPxns.year1 + PRxns.year1 + SCxns.year1 + MSxns.year1 + MTxns.year1 + GOxns.year1 + DFxns.year1 + ROxns.year1 + ACxns.year2 + AMxns.year2 + PAxns.year2 + TOxns.year2 + MAxns.year2 + PIxns.year2 + CExns.year2 + PBxns.year2 + PExns.year2 + ALxns.year2 + BAxns.year2 + MGxns.year2 + ESxns.year2 + RJxns.year2 + SPxns.year2 + PRxns.year2 + SCxns.year2 + MSxns.year2 + MTxns.year2 + GOxns.year2 + DFxns.year2 + ROxns.year2 + RNxns.year2 + ACxns.year3 + AMxns.year3 + PAxns.year3 + TOxns.year3 + PIxns.year3 + CExns.year3 + PBxns.year3 + PExns.year3 + ALxns.year3 + BAxns.year3 + MGxns.year3 + ESxns.year3 + RJxns.year3 + SPxns.year3 + PRxns.year3 + SCxns.year3 + MSxns.year3 + MTxns.year3 + GOxns.year3 + DFxns.year3 + ROxns.year3 | Munic�pio + year | 0 | Munic�pio + year, data=df, keepCX=T)
    mod.con <- ConleySEs(mod.hi, df[-mod.hi$na.action,], "Munic�pio", "year", "latitude", "longitude")
}

TT <- seq(0, 40, length.out=100)
tavg <- sum(df$total.produce * df$tavg, na.rm=T) / sum(df$total.produce * !is.na(df$tavg), na.rm=T) - 273.15
tavg <- 20 # very close to this, easier to interpret
preddf <- data.frame(T=TT, tmin=(TT - tavg) / perioddays,
                     tmax=(TT - tavg) / perioddays,
                     gdd1000=(ifelse(TT > thresholds[1], ifelse(TT < thresholds[2], TT - thresholds[1], diff(thresholds)), 0) - ifelse(tavg > thresholds[1], ifelse(tavg < thresholds[2], tavg - thresholds[1], diff(thresholds)), 0)) / 1000, edd1000=ifelse(TT > thresholds[2], TT - thresholds[2], 0) / 1000, prcp=0, prcp2=0)
for (state in unique(df$state)) {
    preddf[, paste0(state, 'xns.year1')] <- 0
    preddf[, paste0(state, 'xns.year2')] <- 0
    preddf[, paste0(state, 'xns.year3')] <- 0
}

plotdf.hi <- predict.felm(mod.hi, preddf, interval='confidence')
plotdf.hi$T <- TT
plotdf.con <- predict.felm(mod.hi, preddf, interval='confidence', special.vcov=mod.con$Spatial_HAC)
plotdf.con$T <- TT

ggplot(plotdf.hi, aes(T, fit)) +
    geom_hline(yintercept=0, colour='grey50') +
    geom_line() + geom_ribbon(aes(ymin=lwr, ymax=upr), alpha=.33) +
    geom_ribbon(data=plotdf.con, aes(ymin=lwr, ymax=upr), alpha=.33) +
    coord_cartesian(ylim=c(-.03, .01)) +
    theme_bw() + xlab("Daily Temperature (C)") + ylab("Incremental Log Yield") +
    scale_x_continuous(expand=c(0, 0))
ggsave(paste0("../figures/regs/gddmod-single", output.suffix, ".pdf"), width=4, height=2)

df$logyield.arabica <- log(df$arabica.produce / df$arabica.harvest)
df$logyield.arabica[!is.finite(df$logyield.arabica)] <- NA

df$logyield.robusta <- log(df$robusta.produce / df$robusta.harvest)
df$logyield.robusta[!is.finite(df$logyield.robusta)] <- NA

if (do.origspec) {
    mod.arabica.hi <- felm(logyield.arabica ~ tmin + gdd1000 + edd1000 + prcp + prcp2 + ACxns.year1 + CExns.year1 + PExns.year1 + BAxns.year1 + MGxns.year1 + ESxns.year1 + RJxns.year1 + SPxns.year1 + PRxns.year1 + MSxns.year1 + MTxns.year1 + GOxns.year1 + DFxns.year1 + AMxns.year3 + CExns.year3 + PExns.year3 + BAxns.year3 + MGxns.year3 + ESxns.year3 + RJxns.year3 + SPxns.year3 + PRxns.year3 + MSxns.year3 + MTxns.year3 + GOxns.year3 + DFxns.year3 | Munic�pio + year | 0 | Munic�pio + year, data=df, keepCX=T)
    mod.robusta.hi <- felm(logyield.robusta ~ tmin + tmax + gdd1000 + edd1000 + prcp + prcp2 + ACxns.year1 + AMxns.year1 + PAxns.year1 + BAxns.year1 + MGxns.year1 + ESxns.year1 + SPxns.year1 + MTxns.year1 + ROxns.year1 + ACxns.year3 + AMxns.year3 + PAxns.year3 + CExns.year3 + BAxns.year3 + MGxns.year3 + ESxns.year3 + MTxns.year3 + ROxns.year3 | Munic�pio + year | 0 | Munic�pio + year, data=df, keepCX=T)
    mod.arabica.con <- ConleySEs(mod.arabica.hi, df[-mod.arabica.hi$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
    mod.robusta.con <- ConleySEs(mod.robusta.hi, df[-mod.robusta.hi$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
} else {
    mod.arabica.hi <- felm(logyield.arabica ~ tmin + tmax + gdd1000 + edd1000 + prcp + prcp2 + ACxns.year1 + CExns.year1 + PExns.year1 + BAxns.year1 + MGxns.year1 + ESxns.year1 + RJxns.year1 + SPxns.year1 + PRxns.year1 + MSxns.year1 + MTxns.year1 + GOxns.year1 + DFxns.year1 | Munic�pio + year | 0 | Munic�pio + year, data=df, keepCX=T)
    mod.robusta.hi <- felm(logyield.robusta ~ tmin + tmax + gdd1000 + edd1000 + prcp + prcp2 + ACxns.year1 + AMxns.year1 + PAxns.year1 + BAxns.year1 + MGxns.year1 + ESxns.year1 + SPxns.year1 + MTxns.year1 + ROxns.year1 | Munic�pio + year | 0 | Munic�pio + year, data=df, keepCX=T)
    mod.arabica.con <- ConleySEs(mod.arabica.hi, df[-mod.arabica.hi$na.action,], "Munic�pio", "year", "latitude", "longitude")
    mod.robusta.con <- ConleySEs(mod.robusta.hi, df[-mod.robusta.hi$na.action,], "Munic�pio", "year", "latitude", "longitude")
}

plotdf.arabica.hi <- predict.felm(mod.arabica.hi, preddf, interval='confidence')
plotdf.arabica.hi$T <- TT

plotdf.robusta.hi <- predict.felm(mod.robusta.hi, preddf, interval='confidence')
plotdf.robusta.hi$T <- TT

plotdf.arabica.con <- predict.felm(mod.arabica.hi, preddf, interval='confidence', special.vcov=mod.arabica.con$Spatial_HAC)
plotdf.arabica.con$T <- TT

plotdf.robusta.con <- predict.felm(mod.robusta.hi, preddf, interval='confidence', special.vcov=mod.robusta.con$Spatial_HAC)
plotdf.robusta.con$T <- TT

ggplot(rbind(cbind(group='Combined', plotdf.hi),
             cbind(group='Only Arabica', plotdf.arabica.hi),
             cbind(group='Only Robusta', plotdf.robusta.hi)), aes(T, fit)) +
    facet_wrap(~ group, ncol=3) +
    geom_hline(yintercept=0, colour='grey50') +
    geom_line() + geom_ribbon(aes(ymin=lwr, ymax=upr), alpha=.33) +
    geom_ribbon(data=rbind(cbind(group='Combined', plotdf.con),
                           cbind(group='Only Arabica', plotdf.arabica.con),
                           cbind(group='Only Robusta', plotdf.robusta.con)),
                aes(ymin=lwr, ymax=upr), alpha=.33) +
    coord_cartesian(ylim=c(-.03, .01)) +
    theme_bw() + xlab("Daily Temperature (C)") + ylab("Incremental Log Yield") +
    scale_x_continuous(expand=c(0, 0)) + theme(panel.spacing=unit(1, "lines"))
ggsave(paste0("../figures/regs/gddmod", output.suffix, ".pdf"), width=8, height=2.5)

ggplot(data.frame(group=rep(c('Combined', 'Only Arabica', 'Only Robusta', 'Combined', 'Only Arabica', 'Only Robusta'), each=nrow(df)),
                  T=c(df$tmin, ifelse(is.na(df$arabica.harvest), NA, df$tmin),
                      ifelse(is.na(df$robusta.harvest), NA, df$tmin),
                      df$tmax, ifelse(is.na(df$arabica.harvest), NA, df$tmax),
                      ifelse(is.na(df$robusta.harvest), NA, df$tmax)) - 273.15), aes(T)) +
    facet_wrap(~ group, ncol=3) +
    geom_histogram() + scale_y_continuous(labels=scales::percent) +
    theme_bw() + xlab("Daily Temperature (C)") + ylab("Number of Half-Days") +
    scale_x_continuous(expand=c(0, 0)) + theme(panel.spacing=unit(1, "lines")) +
    coord_cartesian(xlim=c(0, 40)) + scale_y_log10()
ggsave("../figures/regs/gddmod-hist.pdf", width=8, height=2)
